Speaker: Ruth Chirinos

Data transformation

Prerequisites

#install.packages("dplyr")
library(tidyverse)
library(nycflights13)

Data set nycflights13

flights
#
?flights

More ways to learn basic info on a data.frame.

names(flights)
 [1] "year"           "month"          "day"            "dep_time"       "sched_dep_time" "dep_delay"     
 [7] "arr_time"       "sched_arr_time" "arr_delay"      "carrier"        "flight"         "tailnum"       
[13] "origin"         "dest"           "air_time"       "distance"       "hour"           "minute"        
[19] "time_hour"     
dim(flights)    # ?dim dimension
[1] 336776     19
ncol(flights)   # ?ncol number of columns
[1] 19
nrow(flights)   # ?nrow number of rows
[1] 336776
summary(flights)
      year          month             day           dep_time    sched_dep_time   dep_delay          arr_time   
 Min.   :2013   Min.   : 1.000   Min.   : 1.00   Min.   :   1   Min.   : 106   Min.   : -43.00   Min.   :   1  
 1st Qu.:2013   1st Qu.: 4.000   1st Qu.: 8.00   1st Qu.: 907   1st Qu.: 906   1st Qu.:  -5.00   1st Qu.:1104  
 Median :2013   Median : 7.000   Median :16.00   Median :1401   Median :1359   Median :  -2.00   Median :1535  
 Mean   :2013   Mean   : 6.549   Mean   :15.71   Mean   :1349   Mean   :1344   Mean   :  12.64   Mean   :1502  
 3rd Qu.:2013   3rd Qu.:10.000   3rd Qu.:23.00   3rd Qu.:1744   3rd Qu.:1729   3rd Qu.:  11.00   3rd Qu.:1940  
 Max.   :2013   Max.   :12.000   Max.   :31.00   Max.   :2400   Max.   :2359   Max.   :1301.00   Max.   :2400  
                                                 NA's   :8255                  NA's   :8255      NA's   :8713  
 sched_arr_time   arr_delay          carrier              flight       tailnum             origin         
 Min.   :   1   Min.   : -86.000   Length:336776      Min.   :   1   Length:336776      Length:336776     
 1st Qu.:1124   1st Qu.: -17.000   Class :character   1st Qu.: 553   Class :character   Class :character  
 Median :1556   Median :  -5.000   Mode  :character   Median :1496   Mode  :character   Mode  :character  
 Mean   :1536   Mean   :   6.895                      Mean   :1972                                        
 3rd Qu.:1945   3rd Qu.:  14.000                      3rd Qu.:3465                                        
 Max.   :2359   Max.   :1272.000                      Max.   :8500                                        
                NA's   :9430                                                                              
     dest              air_time        distance         hour           minute        time_hour                  
 Length:336776      Min.   : 20.0   Min.   :  17   Min.   : 1.00   Min.   : 0.00   Min.   :2013-01-01 05:00:00  
 Class :character   1st Qu.: 82.0   1st Qu.: 502   1st Qu.: 9.00   1st Qu.: 8.00   1st Qu.:2013-04-04 13:00:00  
 Mode  :character   Median :129.0   Median : 872   Median :13.00   Median :29.00   Median :2013-07-03 10:00:00  
                    Mean   :150.7   Mean   :1040   Mean   :13.18   Mean   :26.23   Mean   :2013-07-03 05:22:54  
                    3rd Qu.:192.0   3rd Qu.:1389   3rd Qu.:17.00   3rd Qu.:44.00   3rd Qu.:2013-10-01 07:00:00  
                    Max.   :695.0   Max.   :4983   Max.   :23.00   Max.   :59.00   Max.   :2013-12-31 23:00:00  
                    NA's   :9430                                                                                
#install.packages('skimr')
library(skimr) 
skim(flights)
── Data Summary ────────────────────────
                           Values 
Name                       flights
Number of rows             336776 
Number of columns          19     
_______________________           
Column type frequency:            
  character                4      
  numeric                  14     
  POSIXct                  1      
________________________          
Group variables            None   

── Variable type: character ──────────────────────────────────────────────────────────────────────────────────────────
  skim_variable n_missing complete_rate   min   max empty n_unique whitespace
1 carrier               0         1         2     2     0       16          0
2 tailnum            2512         0.993     5     6     0     4043          0
3 origin                0         1         3     3     0        3          0
4 dest                  0         1         3     3     0      105          0

── Variable type: numeric ────────────────────────────────────────────────────────────────────────────────────────────
   skim_variable  n_missing complete_rate    mean      sd    p0   p25   p50   p75  p100 hist 
 1 year                   0         1     2013       0     2013  2013  2013  2013  2013 ▁▁▇▁▁
 2 month                  0         1        6.55    3.41     1     4     7    10    12 ▇▆▆▆▇
 3 day                    0         1       15.7     8.77     1     8    16    23    31 ▇▇▇▇▆
 4 dep_time            8255         0.975 1349.    488.       1   907  1401  1744  2400 ▁▇▆▇▃
 5 sched_dep_time         0         1     1344.    467.     106   906  1359  1729  2359 ▁▇▇▇▃
 6 dep_delay           8255         0.975   12.6    40.2    -43    -5    -2    11  1301 ▇▁▁▁▁
 7 arr_time            8713         0.974 1502.    533.       1  1104  1535  1940  2400 ▁▃▇▇▇
 8 sched_arr_time         0         1     1536.    497.       1  1124  1556  1945  2359 ▁▃▇▇▇
 9 arr_delay           9430         0.972    6.90   44.6    -86   -17    -5    14  1272 ▇▁▁▁▁
10 flight                 0         1     1972.   1632.       1   553  1496  3465  8500 ▇▃▃▁▁
11 air_time            9430         0.972  151.     93.7     20    82   129   192   695 ▇▂▂▁▁
12 distance               0         1     1040.    733.      17   502   872  1389  4983 ▇▃▂▁▁
13 hour                   0         1       13.2     4.66     1     9    13    17    23 ▁▇▇▇▅
14 minute                 0         1       26.2    19.3      0     8    29    44    59 ▇▃▆▃▅

── Variable type: POSIXct ────────────────────────────────────────────────────────────────────────────────────────────
  skim_variable n_missing complete_rate min                 max                 median              n_unique
1 time_hour             0             1 2013-01-01 05:00:00 2013-12-31 23:00:00 2013-07-03 10:00:00     6936

Ver el tibble (data frame)

#View(flights)
glimpse(flights)
Observations: 336,776
Variables: 19
$ year           <int> 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 20…
$ month          <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ day            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ dep_time       <int> 517, 533, 542, 544, 554, 554, 555, 557, 557, 558, 558, 558, 558, 558, 559, 559, 559, 600, 60…
$ sched_dep_time <int> 515, 529, 540, 545, 600, 558, 600, 600, 600, 600, 600, 600, 600, 600, 600, 559, 600, 600, 60…
$ dep_delay      <dbl> 2, 4, 2, -1, -6, -4, -5, -3, -3, -2, -2, -2, -2, -2, -1, 0, -1, 0, 0, 1, -8, -3, -4, -4, 0, …
$ arr_time       <int> 830, 850, 923, 1004, 812, 740, 913, 709, 838, 753, 849, 853, 924, 923, 941, 702, 854, 851, 8…
$ sched_arr_time <int> 819, 830, 850, 1022, 837, 728, 854, 723, 846, 745, 851, 856, 917, 937, 910, 706, 902, 858, 8…
$ arr_delay      <dbl> 11, 20, 33, -18, -25, 12, 19, -14, -8, 8, -2, -3, 7, -14, 31, -4, -8, -7, 12, -6, -8, 16, -1…
$ carrier        <chr> "UA", "UA", "AA", "B6", "DL", "UA", "B6", "EV", "B6", "AA", "B6", "B6", "UA", "UA", "AA", "B…
$ flight         <int> 1545, 1714, 1141, 725, 461, 1696, 507, 5708, 79, 301, 49, 71, 194, 1124, 707, 1806, 1187, 37…
$ tailnum        <chr> "N14228", "N24211", "N619AA", "N804JB", "N668DN", "N39463", "N516JB", "N829AS", "N593JB", "N…
$ origin         <chr> "EWR", "LGA", "JFK", "JFK", "LGA", "EWR", "EWR", "LGA", "JFK", "LGA", "JFK", "JFK", "JFK", "…
$ dest           <chr> "IAH", "IAH", "MIA", "BQN", "ATL", "ORD", "FLL", "IAD", "MCO", "ORD", "PBI", "TPA", "LAX", "…
$ air_time       <dbl> 227, 227, 160, 183, 116, 150, 158, 53, 140, 138, 149, 158, 345, 361, 257, 44, 337, 152, 134,…
$ distance       <dbl> 1400, 1416, 1089, 1576, 762, 719, 1065, 229, 944, 733, 1028, 1005, 2475, 2565, 1389, 187, 22…
$ hour           <dbl> 5, 5, 5, 5, 6, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
$ minute         <dbl> 15, 29, 40, 45, 0, 58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0, 0, 0, 0, 10, 5, 10, 10, 7, 0, 0, 10…
$ time_hour      <dttm> 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01…

dplyr basics

  • Pick observations by their values (filter()).
  • Reorder the rows (arrange()).
  • Pick variables by their names (select()).
  • Create new variables with functions of existing variables (mutate()).
  • Collapse many values down to a single summary (summarise()).

Filter rows with filter()

filter(flights, month == 1, day == 1)

dplyr functions never modify their inputs, so if you want to save the result, you’ll need to use the assignment operator, <-

( jan1 <- filter(flights, month == 1, day == 1) )
NA

Comparisons

# filter(flights, month = 1)
sqrt(2) ^ 2 == 2
[1] FALSE
1 / 49 * 49 == 1
[1] FALSE
#
near(sqrt(2) ^ 2,  2)
[1] TRUE
near(1 / 49 * 49, 1)
[1] TRUE

Logical operators

Boolean operators: & is “and”, | is “or”, and ! is “no”

filter(flights, month == 11 | month == 12)

Example with %in%

( nov_dec <- filter(flights, month %in% c(11, 12)) )

If you wanted to find flights that weren’t delayed (on arrival or departure) by more than two hours, you could use either of the following two filters:

filter(flights, !(arr_delay > 120 | dep_delay > 120))
filter(flights, arr_delay <= 120, dep_delay <= 120)

Missing values

NA > 5
[1] NA
#> [1] NA
10 == NA
[1] NA
#> [1] NA
NA + 10
[1] NA
#> [1] NA
NA / 2
[1] NA
#> [1] NA
NA == NA
#> [1] NA
x <- NA
y <- NA
x == y
#> [1] NA
# We don't know!
is.na(x)
df <- tibble(x = c(1, NA, 3))
df
filter(df, x > 1)
filter(df, is.na(x) | x > 1)

Arrange rows with arrange()

arrange(flights, year, month, day)
arrange(flights, desc(dep_delay))

Missing Values

df <- tibble(x = c(5, 2, NA))
arrange(df, x)
arrange(df, desc(x))

Select columns with select()

select(flights, year, month, day)
select(flights, year:day)
select(flights, -(year:day))
rename(flights, tail_num = tailnum)
select(flights, time_hour, air_time, everything())

Add new variables with mutate()

flights_sml <- select(flights, 
  year:day, 
  ends_with("delay"), 
  distance, 
  air_time
)
mutate(flights_sml,
  gain = dep_delay - arr_delay,
  speed = distance / air_time * 60
)

Note that you can refer to columns that you’ve just created:

mutate(flights_sml,
  gain = dep_delay - arr_delay,
  hours = air_time / 60,
  gain_per_hour = gain / hours
)

If you only want to keep the new variables, use transmute():

transmute(flights,
  gain = dep_delay - arr_delay,
  hours = air_time / 60,
  gain_per_hour = gain / hours
)

Grouped summaries with summarise()

Group By

(by_day <- group_by(flights, year, month, day))
summarise(flights, delay = mean(dep_delay, na.rm = TRUE))
(by_day <- group_by(flights, year, month, day))
summarise(by_day, delay = mean(dep_delay, na.rm = TRUE))

Combining multiple operations with the pipe

delay <- flights %>% 
  group_by(dest) %>% 
  summarise(
    count = n(),
    dist = mean(distance, na.rm = TRUE),
    delay = mean(arr_delay, na.rm = TRUE)
  ) %>% 
  filter(count > 20, dest != "HNL")
#
ggplot(data = delay, mapping = aes(x = dist, y = delay)) +
  geom_point(aes(size = count), alpha = 1/3) +
  geom_smooth(se = FALSE)

Missing values

flights %>% 
  group_by(year, month, day) %>% 
  summarise(mean = mean(dep_delay, na.rm = TRUE))
not_cancelled <- flights %>% 
  filter(!is.na(dep_delay), !is.na(arr_delay))
#
not_cancelled %>% 
  group_by(year, month, day) %>% 
  summarise(mean = mean(dep_delay))

The story is actually a little more nuanced. We can get more insight if we draw a scatterplot of number of flights vs. average delay:

delays <- not_cancelled %>% 
  group_by(tailnum) %>% 
  summarise(
    delay = mean(arr_delay, na.rm = TRUE),
    n = n()
  )

ggplot(data = delays, mapping = aes(x = n, y = delay)) + 
  geom_point(alpha = 1/10)

With a filter n > 25

delays %>% 
  filter(n > 25) %>% 
  ggplot(mapping = aes(x = n, y = delay)) + 
    geom_point(alpha = 1/10) # +

  #geom_smooth(se = FALSE)

Useful summary functions

Measures of location: we’ve used mean(x), but median(x) is also useful.

not_cancelled %>% 
  group_by(year, month, day) %>% 
  summarise(
    avg_delay1 = mean(arr_delay),
    avg_delay2 = mean(arr_delay[arr_delay > 0]) # the average positive delay
  )

Measures of spread: sd(x), IQR(x), mad(x)

not_cancelled %>% 
  group_by(dest) %>% 
  summarise(distance_sd = sd(distance)) %>% 
  arrange(desc(distance_sd))

Measures of rank: min(x), quantile(x, 0.25), max(x)

# When do the first and last flights leave each day?
not_cancelled %>% 
  group_by(year, month, day) %>% 
  summarise(
    first = min(dep_time),
    last = max(dep_time)
  )

Measures of position: first(x), nth(x, 2), last(x)

not_cancelled %>% 
  group_by(year, month, day) %>% 
  summarise(
    first_dep = first(dep_time), 
    last_dep = last(dep_time)
  )

Filtering on Ranks

not_cancelled %>% 
  group_by(year, month, day) %>% 
  mutate(r = min_rank(desc(dep_time))) %>% 
  filter(r %in% range(r))

Counts: You’ve seen n(),

# Which destinations have the most carriers?
not_cancelled %>% 
  group_by(dest) %>% 
  summarise(carriers = n_distinct(carrier)) %>% 
  arrange(desc(carriers))

Counts are so useful that dplyr provides a simple helper if all you want is a count:

not_cancelled %>% 
  count(dest)

Counts and proportions of logical values: sum(x > 10), mean(y == 0)

# How many flights left before 5am? (these usually indicate delayed
# flights from the previous day)
not_cancelled %>% 
  group_by(year, month, day) %>% 
  summarise(n_early = sum(dep_time < 500))

# What proportion of flights are delayed by more than an hour?
not_cancelled %>% 
  group_by(year, month, day) %>% 
  summarise(hour_prop = mean(arr_delay > 60))

Grouping by multiple variables

When you group by multiple variables, each summary peels off one level of the grouping. That makes it easy to progressively roll up a dataset:

daily <- group_by(flights, year, month, day)
(per_day   <- summarise(daily, flights = n()))
#
(per_month <- summarise(per_day, flights = sum(flights)))
#
(per_year  <- summarise(per_month, flights = sum(flights)))
NA

Ungrouping

daily
#
daily %>% 
  ungroup() %>%             # no longer grouped by date
  summarise(flights = n())  # all flights

Grouped mutates (and filters)

  • Find the worst members of each group:
flights_sml %>% 
  group_by(year, month, day) %>%
  filter(rank(desc(arr_delay)) < 10)
  • Find all groups bigger than a threshold:
popular_dests <- flights %>% 
  group_by(dest) %>% 
  filter(n() > 365)
#
popular_dests
  • Standardise to compute per group metrics:
popular_dests %>% 
  filter(arr_delay > 0) %>% 
  mutate(prop_delay = arr_delay / sum(arr_delay)) %>% 
  select(year:day, dest, arr_delay, prop_delay)
LS0tCnRpdGxlOiAiUi1MYWRpZXMnIC0gRGF0YSBNYW5pcHVsYXRpb24gd2l0aCBSIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tClNwZWFrZXI6IFJ1dGggQ2hpcmlub3MKCgojIERhdGEgdHJhbnNmb3JtYXRpb24KIyMgUHJlcmVxdWlzaXRlcwpgYGB7cn0KI2luc3RhbGwucGFja2FnZXMoImRwbHlyIikKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkobnljZmxpZ2h0czEzKQpgYGAKCiMjIERhdGEgc2V0IG55Y2ZsaWdodHMxMyAKYGBge3J9CmZsaWdodHMKIwo/ZmxpZ2h0cwpgYGAKCk1vcmUgd2F5cyB0byBsZWFybiBiYXNpYyBpbmZvIG9uIGEgZGF0YS5mcmFtZS4KYGBge3J9Cm5hbWVzKGZsaWdodHMpCmRpbShmbGlnaHRzKSAgICAjID9kaW0gZGltZW5zaW9uCm5jb2woZmxpZ2h0cykgICAjID9uY29sIG51bWJlciBvZiBjb2x1bW5zCm5yb3coZmxpZ2h0cykgICAjID9ucm93IG51bWJlciBvZiByb3dzCmBgYAoKYGBge3J9CnN1bW1hcnkoZmxpZ2h0cykKI2luc3RhbGwucGFja2FnZXMoJ3NraW1yJykKbGlicmFyeShza2ltcikgCnNraW0oZmxpZ2h0cykKYGBgCgpWZXIgZWwgdGliYmxlIChkYXRhIGZyYW1lKQpgYGB7cn0KI1ZpZXcoZmxpZ2h0cykKZ2xpbXBzZShmbGlnaHRzKQpgYGAKCiMjIGRwbHlyIGJhc2ljcwotIFBpY2sgb2JzZXJ2YXRpb25zIGJ5IHRoZWlyIHZhbHVlcyAoZmlsdGVyKCkpLgotIFJlb3JkZXIgdGhlIHJvd3MgKGFycmFuZ2UoKSkuCi0gUGljayB2YXJpYWJsZXMgYnkgdGhlaXIgbmFtZXMgKHNlbGVjdCgpKS4KLSBDcmVhdGUgbmV3IHZhcmlhYmxlcyB3aXRoIGZ1bmN0aW9ucyBvZiBleGlzdGluZyB2YXJpYWJsZXMgKG11dGF0ZSgpKS4KLSBDb2xsYXBzZSBtYW55IHZhbHVlcyBkb3duIHRvIGEgc2luZ2xlIHN1bW1hcnkgKHN1bW1hcmlzZSgpKS4KCiMjIyBGaWx0ZXIgcm93cyB3aXRoIGZpbHRlcigpCgpgYGB7cn0KZmlsdGVyKGZsaWdodHMsIG1vbnRoID09IDEsIGRheSA9PSAxKQpgYGAKCmRwbHlyIGZ1bmN0aW9ucyBuZXZlciBtb2RpZnkgdGhlaXIgaW5wdXRzLCBzbyBpZiB5b3Ugd2FudCB0byBzYXZlIHRoZSByZXN1bHQsIHlvdeKAmWxsIG5lZWQgdG8gdXNlIHRoZSBhc3NpZ25tZW50IG9wZXJhdG9yLCA8LQpgYGB7cn0KKCBqYW4xIDwtIGZpbHRlcihmbGlnaHRzLCBtb250aCA9PSAxLCBkYXkgPT0gMSkgKQoKYGBgCiMjIyBDb21wYXJpc29ucwoKCmBgYHtyfQojIGZpbHRlcihmbGlnaHRzLCBtb250aCA9IDEpCmBgYApgYGB7cn0Kc3FydCgyKSBeIDIgPT0gMgoxIC8gNDkgKiA0OSA9PSAxCiMKbmVhcihzcXJ0KDIpIF4gMiwgIDIpCm5lYXIoMSAvIDQ5ICogNDksIDEpCmBgYAojIyMgTG9naWNhbCBvcGVyYXRvcnMKCgpCb29sZWFuIG9wZXJhdG9yczogJiBpcyDigJxhbmTigJ0sIHwgaXMg4oCcb3LigJ0sIGFuZCAhIGlzIOKAnG5v4oCdCmBgYHtyfQpmaWx0ZXIoZmxpZ2h0cywgbW9udGggPT0gMTEgfCBtb250aCA9PSAxMikKYGBgCkV4YW1wbGUgd2l0aCAlaW4lCmBgYHtyfQooIG5vdl9kZWMgPC0gZmlsdGVyKGZsaWdodHMsIG1vbnRoICVpbiUgYygxMSwgMTIpKSApCmBgYApJZiB5b3Ugd2FudGVkIHRvIGZpbmQgZmxpZ2h0cyB0aGF0IHdlcmVu4oCZdCBkZWxheWVkIChvbiBhcnJpdmFsIG9yIGRlcGFydHVyZSkgYnkgbW9yZSB0aGFuIHR3byBob3VycywgeW91IGNvdWxkIHVzZSBlaXRoZXIgb2YgdGhlIGZvbGxvd2luZyB0d28gZmlsdGVyczoKCmBgYHtyfQpmaWx0ZXIoZmxpZ2h0cywgIShhcnJfZGVsYXkgPiAxMjAgfCBkZXBfZGVsYXkgPiAxMjApKQpmaWx0ZXIoZmxpZ2h0cywgYXJyX2RlbGF5IDw9IDEyMCwgZGVwX2RlbGF5IDw9IDEyMCkKYGBgCgojIyMgTWlzc2luZyB2YWx1ZXMKCmBgYHtyfQpOQSA+IDUKIz4gWzFdIE5BCjEwID09IE5BCiM+IFsxXSBOQQpOQSArIDEwCiM+IFsxXSBOQQpOQSAvIDIKIz4gWzFdIE5BCmBgYAoKYGBge3J9Ck5BID09IE5BCiM+IFsxXSBOQQpgYGAKYGBge3J9CnggPC0gTkEKeSA8LSBOQQp4ID09IHkKIz4gWzFdIE5BCiMgV2UgZG9uJ3Qga25vdyEKYGBgCmBgYHtyfQppcy5uYSh4KQpgYGAKYGBge3J9CmRmIDwtIHRpYmJsZSh4ID0gYygxLCBOQSwgMykpCmRmCmZpbHRlcihkZiwgeCA+IDEpCmZpbHRlcihkZiwgaXMubmEoeCkgfCB4ID4gMSkKYGBgCgojIyBBcnJhbmdlIHJvd3Mgd2l0aCBhcnJhbmdlKCkKCmBgYHtyfQphcnJhbmdlKGZsaWdodHMsIHllYXIsIG1vbnRoLCBkYXkpCmBgYApgYGB7cn0KYXJyYW5nZShmbGlnaHRzLCBkZXNjKGRlcF9kZWxheSkpCmBgYApNaXNzaW5nIFZhbHVlcwpgYGB7cn0KZGYgPC0gdGliYmxlKHggPSBjKDUsIDIsIE5BKSkKYXJyYW5nZShkZiwgeCkKYXJyYW5nZShkZiwgZGVzYyh4KSkKYGBgCiMjIFNlbGVjdCBjb2x1bW5zIHdpdGggc2VsZWN0KCkKCmBgYHtyfQpzZWxlY3QoZmxpZ2h0cywgeWVhciwgbW9udGgsIGRheSkKYGBgCmBgYHtyfQpzZWxlY3QoZmxpZ2h0cywgeWVhcjpkYXkpCmBgYApgYGB7cn0Kc2VsZWN0KGZsaWdodHMsIC0oeWVhcjpkYXkpKQpgYGAKYGBge3J9CnJlbmFtZShmbGlnaHRzLCB0YWlsX251bSA9IHRhaWxudW0pCmBgYApgYGB7cn0Kc2VsZWN0KGZsaWdodHMsIHRpbWVfaG91ciwgYWlyX3RpbWUsIGV2ZXJ5dGhpbmcoKSkKYGBgCgojIyBBZGQgbmV3IHZhcmlhYmxlcyB3aXRoIG11dGF0ZSgpCgpgYGB7cn0KZmxpZ2h0c19zbWwgPC0gc2VsZWN0KGZsaWdodHMsIAogIHllYXI6ZGF5LCAKICBlbmRzX3dpdGgoImRlbGF5IiksIAogIGRpc3RhbmNlLCAKICBhaXJfdGltZQopCm11dGF0ZShmbGlnaHRzX3NtbCwKICBnYWluID0gZGVwX2RlbGF5IC0gYXJyX2RlbGF5LAogIHNwZWVkID0gZGlzdGFuY2UgLyBhaXJfdGltZSAqIDYwCikKYGBgCgpOb3RlIHRoYXQgeW91IGNhbiByZWZlciB0byBjb2x1bW5zIHRoYXQgeW914oCZdmUganVzdCBjcmVhdGVkOgpgYGB7cn0KbXV0YXRlKGZsaWdodHNfc21sLAogIGdhaW4gPSBkZXBfZGVsYXkgLSBhcnJfZGVsYXksCiAgaG91cnMgPSBhaXJfdGltZSAvIDYwLAogIGdhaW5fcGVyX2hvdXIgPSBnYWluIC8gaG91cnMKKQpgYGAKCklmIHlvdSBvbmx5IHdhbnQgdG8ga2VlcCB0aGUgbmV3IHZhcmlhYmxlcywgdXNlIHRyYW5zbXV0ZSgpOgpgYGB7cn0KdHJhbnNtdXRlKGZsaWdodHMsCiAgZ2FpbiA9IGRlcF9kZWxheSAtIGFycl9kZWxheSwKICBob3VycyA9IGFpcl90aW1lIC8gNjAsCiAgZ2Fpbl9wZXJfaG91ciA9IGdhaW4gLyBob3VycwopCmBgYAojIyBHcm91cGVkIHN1bW1hcmllcyB3aXRoIHN1bW1hcmlzZSgpCgojIyMgR3JvdXAgQnkKYGBge3J9CihieV9kYXkgPC0gZ3JvdXBfYnkoZmxpZ2h0cywgeWVhciwgbW9udGgsIGRheSkpCmBgYAoKCmBgYHtyfQpzdW1tYXJpc2UoZmxpZ2h0cywgZGVsYXkgPSBtZWFuKGRlcF9kZWxheSwgbmEucm0gPSBUUlVFKSkKYGBgCmBgYHtyfQooYnlfZGF5IDwtIGdyb3VwX2J5KGZsaWdodHMsIHllYXIsIG1vbnRoLCBkYXkpKQpzdW1tYXJpc2UoYnlfZGF5LCBkZWxheSA9IG1lYW4oZGVwX2RlbGF5LCBuYS5ybSA9IFRSVUUpKQpgYGAKCiMjIyBDb21iaW5pbmcgbXVsdGlwbGUgb3BlcmF0aW9ucyB3aXRoIHRoZSBwaXBlCgpgYGB7cn0KZGVsYXkgPC0gZmxpZ2h0cyAlPiUgCiAgZ3JvdXBfYnkoZGVzdCkgJT4lIAogIHN1bW1hcmlzZSgKICAgIGNvdW50ID0gbigpLAogICAgZGlzdCA9IG1lYW4oZGlzdGFuY2UsIG5hLnJtID0gVFJVRSksCiAgICBkZWxheSA9IG1lYW4oYXJyX2RlbGF5LCBuYS5ybSA9IFRSVUUpCiAgKSAlPiUgCiAgZmlsdGVyKGNvdW50ID4gMjAsIGRlc3QgIT0gIkhOTCIpCiMKZ2dwbG90KGRhdGEgPSBkZWxheSwgbWFwcGluZyA9IGFlcyh4ID0gZGlzdCwgeSA9IGRlbGF5KSkgKwogIGdlb21fcG9pbnQoYWVzKHNpemUgPSBjb3VudCksIGFscGhhID0gMS8zKSArCiAgZ2VvbV9zbW9vdGgoc2UgPSBGQUxTRSkKYGBgCiMjIyBNaXNzaW5nIHZhbHVlcwpgYGB7cn0KZmxpZ2h0cyAlPiUgCiAgZ3JvdXBfYnkoeWVhciwgbW9udGgsIGRheSkgJT4lIAogIHN1bW1hcmlzZShtZWFuID0gbWVhbihkZXBfZGVsYXksIG5hLnJtID0gVFJVRSkpCmBgYAoKYGBge3J9Cm5vdF9jYW5jZWxsZWQgPC0gZmxpZ2h0cyAlPiUgCiAgZmlsdGVyKCFpcy5uYShkZXBfZGVsYXkpLCAhaXMubmEoYXJyX2RlbGF5KSkKIwpub3RfY2FuY2VsbGVkICU+JSAKICBncm91cF9ieSh5ZWFyLCBtb250aCwgZGF5KSAlPiUgCiAgc3VtbWFyaXNlKG1lYW4gPSBtZWFuKGRlcF9kZWxheSkpCmBgYAoKVGhlIHN0b3J5IGlzIGFjdHVhbGx5IGEgbGl0dGxlIG1vcmUgbnVhbmNlZC4gV2UgY2FuIGdldCBtb3JlIGluc2lnaHQgaWYgd2UgZHJhdyBhIHNjYXR0ZXJwbG90IG9mIG51bWJlciBvZiBmbGlnaHRzIHZzLiBhdmVyYWdlIGRlbGF5OgpgYGB7cn0KZGVsYXlzIDwtIG5vdF9jYW5jZWxsZWQgJT4lIAogIGdyb3VwX2J5KHRhaWxudW0pICU+JSAKICBzdW1tYXJpc2UoCiAgICBkZWxheSA9IG1lYW4oYXJyX2RlbGF5LCBuYS5ybSA9IFRSVUUpLAogICAgbiA9IG4oKQogICkKCmdncGxvdChkYXRhID0gZGVsYXlzLCBtYXBwaW5nID0gYWVzKHggPSBuLCB5ID0gZGVsYXkpKSArIAogIGdlb21fcG9pbnQoYWxwaGEgPSAxLzEwKQpgYGAKCldpdGggYSBmaWx0ZXIgbiA+IDI1CmBgYHtyfQpkZWxheXMgJT4lIAogIGZpbHRlcihuID4gMjUpICU+JSAKICBnZ3Bsb3QobWFwcGluZyA9IGFlcyh4ID0gbiwgeSA9IGRlbGF5KSkgKyAKICAgIGdlb21fcG9pbnQoYWxwaGEgPSAxLzEwKSAjICsKICAjZ2VvbV9zbW9vdGgoc2UgPSBGQUxTRSkKYGBgCgojIyMgVXNlZnVsIHN1bW1hcnkgZnVuY3Rpb25zCgpNZWFzdXJlcyBvZiBsb2NhdGlvbjogd2XigJl2ZSB1c2VkIG1lYW4oeCksIGJ1dCBtZWRpYW4oeCkgaXMgYWxzbyB1c2VmdWwuCmBgYHtyfQpub3RfY2FuY2VsbGVkICU+JSAKICBncm91cF9ieSh5ZWFyLCBtb250aCwgZGF5KSAlPiUgCiAgc3VtbWFyaXNlKAogICAgYXZnX2RlbGF5MSA9IG1lYW4oYXJyX2RlbGF5KSwKICAgIGF2Z19kZWxheTIgPSBtZWFuKGFycl9kZWxheVthcnJfZGVsYXkgPiAwXSkgIyB0aGUgYXZlcmFnZSBwb3NpdGl2ZSBkZWxheQogICkKYGBgCgpNZWFzdXJlcyBvZiBzcHJlYWQ6IHNkKHgpLCBJUVIoeCksIG1hZCh4KQpgYGB7cn0Kbm90X2NhbmNlbGxlZCAlPiUgCiAgZ3JvdXBfYnkoZGVzdCkgJT4lIAogIHN1bW1hcmlzZShkaXN0YW5jZV9zZCA9IHNkKGRpc3RhbmNlKSkgJT4lIAogIGFycmFuZ2UoZGVzYyhkaXN0YW5jZV9zZCkpCmBgYAoKTWVhc3VyZXMgb2YgcmFuazogbWluKHgpLCBxdWFudGlsZSh4LCAwLjI1KSwgbWF4KHgpCmBgYHtyfQojIFdoZW4gZG8gdGhlIGZpcnN0IGFuZCBsYXN0IGZsaWdodHMgbGVhdmUgZWFjaCBkYXk/Cm5vdF9jYW5jZWxsZWQgJT4lIAogIGdyb3VwX2J5KHllYXIsIG1vbnRoLCBkYXkpICU+JSAKICBzdW1tYXJpc2UoCiAgICBmaXJzdCA9IG1pbihkZXBfdGltZSksCiAgICBsYXN0ID0gbWF4KGRlcF90aW1lKQogICkKYGBgCk1lYXN1cmVzIG9mIHBvc2l0aW9uOiBmaXJzdCh4KSwgbnRoKHgsIDIpLCBsYXN0KHgpCmBgYHtyfQpub3RfY2FuY2VsbGVkICU+JSAKICBncm91cF9ieSh5ZWFyLCBtb250aCwgZGF5KSAlPiUgCiAgc3VtbWFyaXNlKAogICAgZmlyc3RfZGVwID0gZmlyc3QoZGVwX3RpbWUpLCAKICAgIGxhc3RfZGVwID0gbGFzdChkZXBfdGltZSkKICApCmBgYAoKRmlsdGVyaW5nIG9uIFJhbmtzCmBgYHtyfQpub3RfY2FuY2VsbGVkICU+JSAKICBncm91cF9ieSh5ZWFyLCBtb250aCwgZGF5KSAlPiUgCiAgbXV0YXRlKHIgPSBtaW5fcmFuayhkZXNjKGRlcF90aW1lKSkpICU+JSAKICBmaWx0ZXIociAlaW4lIHJhbmdlKHIpKQpgYGAKCkNvdW50czogWW914oCZdmUgc2VlbiBuKCksCmBgYHtyfQojIFdoaWNoIGRlc3RpbmF0aW9ucyBoYXZlIHRoZSBtb3N0IGNhcnJpZXJzPwpub3RfY2FuY2VsbGVkICU+JSAKICBncm91cF9ieShkZXN0KSAlPiUgCiAgc3VtbWFyaXNlKGNhcnJpZXJzID0gbl9kaXN0aW5jdChjYXJyaWVyKSkgJT4lIAogIGFycmFuZ2UoZGVzYyhjYXJyaWVycykpCmBgYAoKQ291bnRzIGFyZSBzbyB1c2VmdWwgdGhhdCBkcGx5ciBwcm92aWRlcyBhIHNpbXBsZSBoZWxwZXIgaWYgYWxsIHlvdSB3YW50IGlzIGEgY291bnQ6CmBgYHtyfQpub3RfY2FuY2VsbGVkICU+JSAKICBjb3VudChkZXN0KQpgYGAKCkNvdW50cyBhbmQgcHJvcG9ydGlvbnMgb2YgbG9naWNhbCB2YWx1ZXM6IHN1bSh4ID4gMTApLCBtZWFuKHkgPT0gMCkKYGBge3J9CiMgSG93IG1hbnkgZmxpZ2h0cyBsZWZ0IGJlZm9yZSA1YW0/ICh0aGVzZSB1c3VhbGx5IGluZGljYXRlIGRlbGF5ZWQKIyBmbGlnaHRzIGZyb20gdGhlIHByZXZpb3VzIGRheSkKbm90X2NhbmNlbGxlZCAlPiUgCiAgZ3JvdXBfYnkoeWVhciwgbW9udGgsIGRheSkgJT4lIAogIHN1bW1hcmlzZShuX2Vhcmx5ID0gc3VtKGRlcF90aW1lIDwgNTAwKSkKCiMgV2hhdCBwcm9wb3J0aW9uIG9mIGZsaWdodHMgYXJlIGRlbGF5ZWQgYnkgbW9yZSB0aGFuIGFuIGhvdXI/Cm5vdF9jYW5jZWxsZWQgJT4lIAogIGdyb3VwX2J5KHllYXIsIG1vbnRoLCBkYXkpICU+JSAKICBzdW1tYXJpc2UoaG91cl9wcm9wID0gbWVhbihhcnJfZGVsYXkgPiA2MCkpCmBgYAoKIyMjIEdyb3VwaW5nIGJ5IG11bHRpcGxlIHZhcmlhYmxlcwpXaGVuIHlvdSBncm91cCBieSBtdWx0aXBsZSB2YXJpYWJsZXMsIGVhY2ggc3VtbWFyeSBwZWVscyBvZmYgb25lIGxldmVsIG9mIHRoZSBncm91cGluZy4gVGhhdCBtYWtlcyBpdCBlYXN5IHRvIHByb2dyZXNzaXZlbHkgcm9sbCB1cCBhIGRhdGFzZXQ6CmBgYHtyfQpkYWlseSA8LSBncm91cF9ieShmbGlnaHRzLCB5ZWFyLCBtb250aCwgZGF5KQoocGVyX2RheSAgIDwtIHN1bW1hcmlzZShkYWlseSwgZmxpZ2h0cyA9IG4oKSkpCiMKKHBlcl9tb250aCA8LSBzdW1tYXJpc2UocGVyX2RheSwgZmxpZ2h0cyA9IHN1bShmbGlnaHRzKSkpCiMKKHBlcl95ZWFyICA8LSBzdW1tYXJpc2UocGVyX21vbnRoLCBmbGlnaHRzID0gc3VtKGZsaWdodHMpKSkKCmBgYAoKCiMjIyBVbmdyb3VwaW5nCmBgYHtyfQpkYWlseQojCmRhaWx5ICU+JSAKICB1bmdyb3VwKCkgJT4lICAgICAgICAgICAgICMgbm8gbG9uZ2VyIGdyb3VwZWQgYnkgZGF0ZQogIHN1bW1hcmlzZShmbGlnaHRzID0gbigpKSAgIyBhbGwgZmxpZ2h0cwpgYGAKCiMjIyBHcm91cGVkIG11dGF0ZXMgKGFuZCBmaWx0ZXJzKQotIEZpbmQgdGhlIHdvcnN0IG1lbWJlcnMgb2YgZWFjaCBncm91cDoKYGBge3J9CmZsaWdodHNfc21sICU+JSAKICBncm91cF9ieSh5ZWFyLCBtb250aCwgZGF5KSAlPiUKICBmaWx0ZXIocmFuayhkZXNjKGFycl9kZWxheSkpIDwgMTApCmBgYAoKLSBGaW5kIGFsbCBncm91cHMgYmlnZ2VyIHRoYW4gYSB0aHJlc2hvbGQ6CmBgYHtyfQpwb3B1bGFyX2Rlc3RzIDwtIGZsaWdodHMgJT4lIAogIGdyb3VwX2J5KGRlc3QpICU+JSAKICBmaWx0ZXIobigpID4gMzY1KQojCnBvcHVsYXJfZGVzdHMKYGBgCgotIFN0YW5kYXJkaXNlIHRvIGNvbXB1dGUgcGVyIGdyb3VwIG1ldHJpY3M6CmBgYHtyfQpwb3B1bGFyX2Rlc3RzICU+JSAKICBmaWx0ZXIoYXJyX2RlbGF5ID4gMCkgJT4lIAogIG11dGF0ZShwcm9wX2RlbGF5ID0gYXJyX2RlbGF5IC8gc3VtKGFycl9kZWxheSkpICU+JSAKICAgIHNlbGVjdCh5ZWFyOmRheSwgZGVzdCwgYXJyX2RlbGF5LCBwcm9wX2RlbGF5KQpgYGAKCgoK